2. Initial
Formatting
2.1 Set up file
paths
numeric_file <- "/Users/dgkamper/Library/CloudStorage/GoogleDrive-dgkamper@gmail.com/My Drive/DGK Lab/Collaborations/BlankLang Lab/DGK Lab - BlankLangLab - Second Amendment Language Dependencies/Data/Pilot_Numeric.csv"
coded_file <- "/Users/dgkamper/Library/CloudStorage/GoogleDrive-dgkamper@gmail.com/My Drive/DGK Lab/Collaborations/BlankLang Lab/DGK Lab - BlankLangLab - Second Amendment Language Dependencies/Data/q1_rewrite_responses_for_coding_edited.csv"
cat("File paths set\n")
## File paths set
cat("Numeric data file:", basename(numeric_file), "\n")
## Numeric data file: Pilot_Numeric.csv
2.2 Load and Clean
Qualtrics Data
# Load raw Qualtrics export
df_raw <- read_csv(numeric_file, show_col_types = FALSE)
cat("Loaded raw data:", nrow(df_raw), "rows,", ncol(df_raw), "columns\n")
## Loaded raw data: 145 rows, 140 columns
# Remove Qualtrics metadata rows (rows 1-2 contain question text and import IDs)
# Also remove test/preview rows (rows 1-9 after metadata removal)
df_numeric <- df_raw[-c(1, 2), ] # Remove metadata
df_numeric <- df_numeric[-c(1:9), ] # Remove test rows
cat("After removing metadata and test rows:", nrow(df_numeric), "rows\n")
## After removing metadata and test rows: 134 rows
# Type convert columns
df_numeric <- type.convert(df_numeric, as.is = TRUE)
# Remove incomplete responses (Finished != 1)
df_numeric <- df_numeric %>%
filter(Finished == 1)
cat("After removing incomplete responses:", nrow(df_numeric), "rows\n")
## After removing incomplete responses: 124 rows
cat("Data cleaning complete\n")
## Data cleaning complete
# Clean numeric data
df_numeric <- df_numeric %>%
mutate(
# Convert numeric columns that may have been read as character
across(matches("^(Interpretation|Confidence|IsTrue|Remove)[0-9]+$"), as.numeric),
across(c(AttentionCheck1, Police1, Police2, Police3, Seriousness), as.numeric),
across(c(Progress, `Duration (in seconds)`), as.numeric),
across(c(Age), as.numeric)
)
# Standardize Prolific ID column name
prolific_cols <- names(df_numeric)[str_detect(names(df_numeric), "(?i)prolific")]
if (length(prolific_cols) > 0 && !"Prolific" %in% names(df_numeric)) {
df_numeric <- df_numeric %>%
rename(Prolific = !!prolific_cols[length(prolific_cols)]) # Use last one (usually PROLIFIC_PID)
cat("Standardized Prolific ID column\n")
}
cat("Data cleaning complete\n")
## Data cleaning complete
2.3 Reshape Data to
Long Format
# Handle potential typo in column name (Interpreation19 vs Interpretation19)
if ("Interpreation19" %in% names(df_numeric)) {
df_numeric <- df_numeric %>%
rename(Interpretation19 = Interpreation19)
cat("Fixed typo: Interpreation19 -> Interpretation19\n")
}
## Fixed typo: Interpreation19 -> Interpretation19
# Reshape to long format - this will capture BOTH numeric responses AND text (Rewrite)
df_long <- df_numeric %>%
pivot_longer(
cols = matches("^(Rewrite|Interpretation|Confidence|IsTrue|Remove)[0-9]+$"),
names_to = c(".value", "sentence"),
names_pattern = "([A-Za-z]+)([0-9]+)"
) %>%
# Each participant saw only one sentence; filter out empty rows
filter(!is.na(Interpretation)) %>% # Use Interpretation as the key (always answered)
# Convert sentence identifier to numeric
mutate(sentence = as.integer(sentence))
cat("Reshaped to long format:", nrow(df_long), "observations\n")
## Reshaped to long format: 124 observations
cat("Unique sentences:", n_distinct(df_long$sentence), "\n")
## Unique sentences: 20
cat("Observations with Rewrite responses:", sum(!is.na(df_long$Rewrite)), "\n")
## Observations with Rewrite responses: 124
cat("Sample size range per sentence:",
min(table(df_long$sentence)), "-", max(table(df_long$sentence)), "\n")
## Sample size range per sentence: 5 - 7
# Verify we have the key columns including Rewrite
key_cols <- c("ResponseId", "Prolific", "sentence", "Rewrite", "Interpretation",
"Confidence", "IsTrue", "Remove", "AttentionCheck1", "Police1",
"Police2", "Police3", "Seriousness")
missing_cols <- setdiff(key_cols, names(df_long))
if (length(missing_cols) > 0) {
cat("\nWARNING: Missing expected columns:", paste(missing_cols, collapse = ", "), "\n")
} else {
cat("\nAll key columns present (including Rewrite text)\n")
}
##
## All key columns present (including Rewrite text)
# Display sample of Rewrite responses to verify extraction
cat("\nSample Rewrite responses:\n")
##
## Sample Rewrite responses:
sample_rewrites <- df_long %>%
filter(!is.na(Rewrite)) %>%
select(Prolific, sentence, Rewrite) %>%
slice_head(n = 3)
print(sample_rewrites)
## # A tibble: 3 × 3
## Prolific sentence Rewrite
## <chr> <int> <chr>
## 1 6690c433c8bedd2996d7f973 16 A shelter with good conditions is a necessi…
## 2 5b26da1934ffd00001b45d8d 19 It is important to ensure that communities …
## 3 674b71f1c045ef7f014cc567 11 Scientists need to know how to use a telesc…
2.4 Apply Exclusion
Criteria
cat("=== EXCLUSION CRITERIA ===\n")
## === EXCLUSION CRITERIA ===
cat("1. Failed attention check (AttentionCheck1 != 3)\n")
## 1. Failed attention check (AttentionCheck1 != 3)
cat("2. Failed 2+ comprehension checks (Police1, Police2, Police3)\n")
## 2. Failed 2+ comprehension checks (Police1, Police2, Police3)
cat("3. Did not report taking survey seriously (Seriousness != 4)\n\n")
## 3. Did not report taking survey seriously (Seriousness != 4)
# Track exclusions
n_initial <- nrow(df_long)
df_analysis <- df_long %>%
mutate(
# Attention check: AttentionCheck1 == 3 is correct
screen_pass = (AttentionCheck1 == 3),
# Comprehension checks (0 = correct, 1 = incorrect)
comp1 = if_else(Police1 == 2, 0, 1), # Police1 == 2 is correct
comp2 = if_else(Police2 == 2, 0, 1), # Police2 == 2 is correct
comp3 = if_else(Police3 == 1, 0, 1), # Police3 == 1 is correct
comp_total = comp1 + comp2 + comp3
)
# Count exclusions
n_failed_attention <- sum(!df_analysis$screen_pass)
n_failed_comp <- sum(df_analysis$screen_pass & df_analysis$comp_total >= 2)
n_not_serious <- sum(df_analysis$screen_pass & df_analysis$comp_total < 2 &
df_analysis$Seriousness != 4)
# Apply filters
df_analysis <- df_analysis %>%
filter(
screen_pass == TRUE,
comp_total < 2,
Seriousness == 4
)
n_final <- nrow(df_analysis)
cat("Initial N:", n_initial, "\n")
## Initial N: 124
cat("Excluded - Failed attention check:", n_failed_attention, "\n")
## Excluded - Failed attention check: 0
cat("Excluded - Failed 2+ comprehension:", n_failed_comp, "\n")
## Excluded - Failed 2+ comprehension: 0
cat("Excluded - Not serious:", n_not_serious, "\n")
## Excluded - Not serious: 3
cat("Final N:", n_final, "\n")
## Final N: 121
cat("Retention rate:", round(n_final/n_initial*100, 1), "%\n")
## Retention rate: 97.6 %
2.5 Create Analysis
Variables
df_analysis <- df_analysis %>%
mutate(
# Q2: Binary interpretation variable for logistic regression
Q2_Interpretation_bin = case_when(
Interpretation == 1 ~ 0, # "Only in cases when necessary" (conditional)
Interpretation == 2 ~ 1, # "Always necessary" (categorical)
TRUE ~ NA_real_
),
# Q2: Factor version for plotting
Q2_Interpretation_factor = factor(
Interpretation,
levels = c(1, 2),
labels = c("Only in cases when necessary", "Always necessary")
),
# Q3: Confidence (ensure numeric)
Confidence = as.numeric(Confidence),
# Q4: Ordered factor for perceived impact analysis
Q4_IsTrue_ord = factor(
IsTrue,
ordered = TRUE,
levels = sort(unique(na.omit(IsTrue))),
labels = c("Not crucial", "Somewhat important", "Absolutely essential")
),
# Q5: Centered removal impact variable (4 = neutral midpoint becomes 0)
Q5_Remove_centered = as.numeric(Remove) - 4
)
cat("Analysis variables created successfully\n")
## Analysis variables created successfully
2.6 Define Sentence
Classifications
# Classify sentences by content type based on operative clause
sentence_classifications <- tibble(
sentence = 1:20,
sentence_type = factor(c(
"Right", # 1: archive access
"Other", # 2: orchard efforts
"Ability", # 3: fire safety
"Right", # 4: tradition preservation
"Other", # 5: dam expertise
"Ability", # 6: marketplace transactions
"Ability", # 7: road construction
"Ability", # 8: sundial reading
"Ability", # 9: masterpiece creation
"Right", # 10: beehive access
"Ability", # 11: telescope observation
"Right", # 12: shelter access
"Other", # 13: food reserve access
"Other", # 14: lighthouse guidance
"Ability", # 15: garden cultivation
"Right", # 16: insulated shelter
"Right", # 17: weather station data
"Right", # 18: library access
"Right", # 19: aqueduct water
"Ability" # 20: weathervane reading
), levels = c("Right", "Ability", "Other")),
# Add descriptive labels
sentence_label = c(
"Archive", "Orchard", "Fire", "Tradition", "Dam",
"Marketplace", "Road", "Sundial", "Masterpiece", "Beehive",
"Telescope", "Shelter", "Food Reserve", "Lighthouse", "Garden",
"Insulated Shelter", "Weather Station", "Library", "Aqueduct", "Weathervane"
)
)
# Join with analysis data
df_analysis <- df_analysis %>%
left_join(sentence_classifications, by = "sentence")
cat("Sentence classifications added\n")
## Sentence classifications added
cat(" Right:", sum(sentence_classifications$sentence_type == "Right"), "items\n")
## Right: 8 items
cat(" Ability:", sum(sentence_classifications$sentence_type == "Ability"), "items\n")
## Ability: 8 items
cat(" Other:", sum(sentence_classifications$sentence_type == "Other"), "items\n")
## Other: 4 items
2.7 Create Consistent
Theme for Plots
# Custom theme for all visualizations
theme_analysis <- theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, face = "italic"),
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 11),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 10)
)
# Blue and grey color palette
colors_interpretation <- c(
"Only in cases when necessary" = "#5A7A9B", # Slate blue
"Always necessary" = "#2E5266" # Dark blue-grey
)
colors_sentence_type <- c(
"Right" = "#4A6FA5", # Medium blue
"Ability" = "#7D98B3", # Light blue-grey
"Other" = "#9BADBF" # Pale blue-grey
)
cat("Custom theme and color palettes defined\n")
## Custom theme and color palettes defined
3. Sample
Description
3.1 Final Sample
Characteristics
# Create summary table
sample_summary <- df_analysis %>%
summarise(
N = n(),
N_sentences = n_distinct(sentence),
Mean_age = mean(as.numeric(Age), na.rm = TRUE),
SD_age = sd(as.numeric(Age), na.rm = TRUE),
Pct_female = mean(Gender == 2, na.rm = TRUE) * 100,
Mean_duration_sec = mean(`Duration (in seconds)`, na.rm = TRUE),
SD_duration_sec = sd(`Duration (in seconds)`, na.rm = TRUE)
)
kable(sample_summary, digits = 2,
caption = "Sample Characteristics",
col.names = c("N", "# Sentences", "Mean Age", "SD Age",
"% Female", "Mean Duration (s)", "SD Duration (s)"))
Sample Characteristics
| 121 |
20 |
36.71 |
12.62 |
59.5 |
870.1 |
3955.77 |
3.2 Participants Per
Sentence
# Check distribution of participants across sentences
n_per_sentence <- df_analysis %>%
count(sentence, sentence_label, sentence_type) %>%
arrange(sentence)
kable(n_per_sentence,
caption = "Number of Participants per Sentence",
col.names = c("Sentence #", "Label", "Type", "N"))
Number of Participants per Sentence
| 1 |
Archive |
Right |
5 |
| 2 |
Orchard |
Other |
7 |
| 3 |
Fire |
Ability |
7 |
| 4 |
Tradition |
Right |
6 |
| 5 |
Dam |
Other |
7 |
| 6 |
Marketplace |
Ability |
6 |
| 7 |
Road |
Ability |
6 |
| 8 |
Sundial |
Ability |
6 |
| 9 |
Masterpiece |
Ability |
7 |
| 10 |
Beehive |
Right |
6 |
| 11 |
Telescope |
Ability |
6 |
| 12 |
Shelter |
Right |
7 |
| 13 |
Food Reserve |
Other |
6 |
| 14 |
Lighthouse |
Other |
6 |
| 15 |
Garden |
Ability |
5 |
| 16 |
Insulated Shelter |
Right |
7 |
| 17 |
Weather Station |
Right |
5 |
| 18 |
Library |
Right |
5 |
| 19 |
Aqueduct |
Right |
5 |
| 20 |
Weathervane |
Ability |
6 |
# Visualize distribution
ggplot(n_per_sentence, aes(x = reorder(sentence_label, sentence), y = n, fill = sentence_type)) +
geom_col(alpha = 0.8) +
scale_fill_manual(values = colors_sentence_type) +
coord_flip() +
labs(
title = "Participant Distribution Across Sentences",
x = "Sentence",
y = "Number of Participants",
fill = "Sentence Type"
) +
theme_analysis

3.3 Data Quality
Check
# Check for missing data
missing_summary <- df_analysis %>%
summarise(
across(c(Interpretation, Confidence, IsTrue, Remove),
~sum(is.na(.)),
.names = "Missing_{.col}")
)
cat("Missing data summary:\n")
## Missing data summary:
print(missing_summary)
## # A tibble: 1 × 4
## Missing_Interpretation Missing_Confidence Missing_IsTrue Missing_Remove
## <int> <int> <int> <int>
## 1 0 0 0 0
# Check response time outliers
duration_summary <- df_analysis %>%
summarise(
Min = min(`Duration (in seconds)`, na.rm = TRUE),
Q1 = quantile(`Duration (in seconds)`, 0.25, na.rm = TRUE),
Median = median(`Duration (in seconds)`, na.rm = TRUE),
Q3 = quantile(`Duration (in seconds)`, 0.75, na.rm = TRUE),
Max = max(`Duration (in seconds)`, na.rm = TRUE)
)
cat("\nResponse time distribution (seconds):\n")
##
## Response time distribution (seconds):
print(duration_summary)
## # A tibble: 1 × 5
## Min Q1 Median Q3 Max
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 188 319 422 567 43831
5. Item-Level
Comprehensive Analysis
5.1 Complete Item
Summary Table
# Create comprehensive item-level summary
item_summary <- df_analysis %>%
group_by(sentence, sentence_label, sentence_type) %>%
summarise(
N = n(),
# Q2: Interpretation
Pct_Categorical = mean(Q2_Interpretation_bin, na.rm = TRUE) * 100,
# Q3: Confidence
Mean_Confidence = mean(Confidence, na.rm = TRUE),
SD_Confidence = sd(Confidence, na.rm = TRUE),
# Q4: Perceived Impact (as numeric 1-3)
Mean_Impact = mean(as.numeric(Q4_IsTrue_ord), na.rm = TRUE),
# Q5: Removal
Mean_Removal = mean(Q5_Remove_centered, na.rm = TRUE),
SD_Removal = sd(Q5_Remove_centered, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(sentence)
kable(item_summary, digits = 2,
caption = "Comprehensive Item-Level Summary",
col.names = c("Sent", "Label", "Type", "N", "% Categ",
"M Conf", "SD Conf", "M Impact", "M Removal", "SD Removal"))
Comprehensive Item-Level Summary
| 1 |
Archive |
Right |
5 |
80.00 |
4.40 |
1.52 |
2.20 |
-0.20 |
1.64 |
| 2 |
Orchard |
Other |
7 |
100.00 |
6.14 |
0.69 |
2.43 |
-1.43 |
1.27 |
| 3 |
Fire |
Ability |
7 |
100.00 |
4.57 |
1.27 |
2.14 |
0.14 |
1.57 |
| 4 |
Tradition |
Right |
6 |
83.33 |
5.67 |
0.82 |
1.83 |
-0.83 |
1.60 |
| 5 |
Dam |
Other |
7 |
85.71 |
5.71 |
0.76 |
2.57 |
-0.29 |
1.38 |
| 6 |
Marketplace |
Ability |
6 |
100.00 |
6.00 |
0.63 |
1.67 |
-0.50 |
2.07 |
| 7 |
Road |
Ability |
6 |
100.00 |
5.17 |
1.17 |
1.67 |
-1.00 |
1.10 |
| 8 |
Sundial |
Ability |
6 |
66.67 |
4.50 |
1.76 |
1.67 |
0.50 |
2.07 |
| 9 |
Masterpiece |
Ability |
7 |
71.43 |
5.00 |
1.83 |
2.14 |
0.14 |
1.86 |
| 10 |
Beehive |
Right |
6 |
100.00 |
4.33 |
1.37 |
2.00 |
-0.67 |
1.86 |
| 11 |
Telescope |
Ability |
6 |
66.67 |
5.33 |
0.52 |
2.33 |
0.50 |
1.87 |
| 12 |
Shelter |
Right |
7 |
71.43 |
5.86 |
1.57 |
1.71 |
0.00 |
2.31 |
| 13 |
Food Reserve |
Other |
6 |
100.00 |
5.50 |
1.05 |
2.00 |
0.17 |
1.94 |
| 14 |
Lighthouse |
Other |
6 |
83.33 |
5.83 |
1.47 |
2.17 |
0.83 |
0.98 |
| 15 |
Garden |
Ability |
5 |
100.00 |
4.20 |
1.92 |
2.00 |
-1.00 |
1.73 |
| 16 |
Insulated Shelter |
Right |
7 |
71.43 |
5.43 |
1.27 |
2.43 |
-0.29 |
1.98 |
| 17 |
Weather Station |
Right |
5 |
60.00 |
5.00 |
1.22 |
1.80 |
-1.00 |
1.22 |
| 18 |
Library |
Right |
5 |
80.00 |
6.00 |
1.73 |
2.60 |
0.20 |
1.10 |
| 19 |
Aqueduct |
Right |
5 |
60.00 |
5.00 |
2.35 |
2.20 |
-0.20 |
1.64 |
| 20 |
Weathervane |
Ability |
6 |
100.00 |
3.83 |
1.33 |
2.00 |
0.50 |
2.17 |
# Export for reference
write_csv(item_summary, "item_level_summary.csv")
cat("\nItem-level summary exported to: item_level_summary.csv\n")
##
## Item-level summary exported to: item_level_summary.csv
5.2 Identify
Problematic Items
cat("=== OUTLIER DETECTION ===\n\n")
## === OUTLIER DETECTION ===
# Calculate z-scores for each measure
item_summary_z <- item_summary %>%
mutate(
z_categorical = scale(Pct_Categorical)[,1],
z_confidence = scale(Mean_Confidence)[,1],
z_impact = scale(Mean_Impact)[,1],
z_removal = scale(Mean_Removal)[,1]
)
# Flag outliers (|z| > 2)
outliers <- item_summary_z %>%
filter(
abs(z_categorical) > 2 |
abs(z_confidence) > 2 |
abs(z_impact) > 2 |
abs(z_removal) > 2
)
if (nrow(outliers) > 0) {
cat("Items with extreme values (|z| > 2):\n\n")
outlier_report <- outliers %>%
select(sentence, sentence_label,
z_categorical, z_confidence, z_impact, z_removal) %>%
mutate(
Outlier_On = case_when(
abs(z_categorical) > 2 ~ "Categorical%",
abs(z_confidence) > 2 ~ "Confidence",
abs(z_impact) > 2 ~ "Impact",
abs(z_removal) > 2 ~ "Removal",
TRUE ~ "Multiple"
)
)
kable(outlier_report, digits = 2,
caption = "Outlier Items",
col.names = c("Sent", "Label", "z Categ", "z Conf", "z Impact", "z Removal", "Outlier On"))
} else {
cat("No major outliers detected (all |z| < 2)\n")
}
## No major outliers detected (all |z| < 2)
# Items with low N
low_n <- item_summary %>%
filter(N < 5)
if (nrow(low_n) > 0) {
cat("\nWARNING: Items with low sample size (N < 5):\n")
print(low_n %>% select(sentence, sentence_label, N))
}
5.3 Multi-Measure
Item Visualization
# Create comprehensive heatmap
item_heatmap_data <- item_summary %>%
select(sentence_label, Pct_Categorical, Mean_Confidence, Mean_Impact, Mean_Removal) %>%
pivot_longer(cols = -sentence_label, names_to = "Measure", values_to = "Value") %>%
group_by(Measure) %>%
mutate(Value_scaled = scale(Value)[,1]) %>%
ungroup()
# Rename measures for display
item_heatmap_data <- item_heatmap_data %>%
mutate(
Measure = factor(Measure,
levels = c("Pct_Categorical", "Mean_Confidence",
"Mean_Impact", "Mean_Removal"),
labels = c("% Categorical", "Confidence",
"Perceived Impact", "Removal Impact"))
)
plot_item_heatmap <- ggplot(item_heatmap_data,
aes(x = Measure, y = sentence_label, fill = Value_scaled)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "#2E5266", mid = "white", high = "#0072B2",
midpoint = 0, name = "Z-score") +
labs(
title = "Item-Level Performance Heatmap",
subtitle = "Standardized scores across all measures (z-scores)",
x = "Measure",
y = "Sentence"
) +
theme_analysis +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 8)
)
print(plot_item_heatmap)

5.4 Item Reliability
Analysis
# Calculate within-sentence variance for each measure
# Higher variance suggests items that elicit more diverse responses
item_variance <- df_analysis %>%
group_by(sentence, sentence_label) %>%
summarise(
Var_Interpretation = var(Q2_Interpretation_bin, na.rm = TRUE),
Var_Confidence = var(Confidence, na.rm = TRUE),
Var_Removal = var(Q5_Remove_centered, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(Var_Interpretation))
cat("=== ITEM VARIANCE ===\n")
## === ITEM VARIANCE ===
cat("Items with high variance show more disagreement among participants\n\n")
## Items with high variance show more disagreement among participants
kable(item_variance, digits = 3,
caption = "Item-Level Variance (Higher = More Disagreement)",
col.names = c("Sent", "Label", "Var Interp", "Var Conf", "Var Removal"))
Item-Level Variance (Higher = More Disagreement)
| 17 |
Weather Station |
0.300 |
1.500 |
1.500 |
| 19 |
Aqueduct |
0.300 |
5.500 |
2.700 |
| 8 |
Sundial |
0.267 |
3.100 |
4.300 |
| 11 |
Telescope |
0.267 |
0.267 |
3.500 |
| 16 |
Insulated Shelter |
0.238 |
1.619 |
3.905 |
| 12 |
Shelter |
0.238 |
2.476 |
5.333 |
| 9 |
Masterpiece |
0.238 |
3.333 |
3.476 |
| 1 |
Archive |
0.200 |
2.300 |
2.700 |
| 18 |
Library |
0.200 |
3.000 |
1.200 |
| 4 |
Tradition |
0.167 |
0.667 |
2.567 |
| 14 |
Lighthouse |
0.167 |
2.167 |
0.967 |
| 5 |
Dam |
0.143 |
0.571 |
1.905 |
| 2 |
Orchard |
0.000 |
0.476 |
1.619 |
| 3 |
Fire |
0.000 |
1.619 |
2.476 |
| 6 |
Marketplace |
0.000 |
0.400 |
4.300 |
| 7 |
Road |
0.000 |
1.367 |
1.200 |
| 10 |
Beehive |
0.000 |
1.867 |
3.467 |
| 13 |
Food Reserve |
0.000 |
1.100 |
3.767 |
| 15 |
Garden |
0.000 |
3.700 |
3.000 |
| 20 |
Weathervane |
0.000 |
1.767 |
4.700 |
# Items with highest disagreement on interpretation
high_variance_items <- item_variance %>%
filter(Var_Interpretation > quantile(Var_Interpretation, 0.75, na.rm = TRUE))
cat("\nItems with highest disagreement on interpretation:\n")
##
## Items with highest disagreement on interpretation:
print(high_variance_items %>% select(sentence, sentence_label, Var_Interpretation))
## # A tibble: 5 × 3
## sentence sentence_label Var_Interpretation
## <int> <chr> <dbl>
## 1 17 Weather Station 0.3
## 2 19 Aqueduct 0.3
## 3 8 Sundial 0.267
## 4 11 Telescope 0.267
## 5 16 Insulated Shelter 0.238
6. Cross-Variable
Relationships
6.1 Correlation
Matrix
# Create numeric versions of all key variables
corr_data <- df_analysis %>%
filter(!is.na(Q2_Interpretation_bin), !is.na(Confidence),
!is.na(Q4_IsTrue_ord), !is.na(Q5_Remove_centered)) %>%
mutate(
Q4_numeric = as.numeric(Q4_IsTrue_ord)
) %>%
select(
Interpretation = Q2_Interpretation_bin,
Confidence,
`Perceived Impact` = Q4_numeric,
`Removal Impact` = Q5_Remove_centered
)
# Calculate correlation matrix
cor_matrix <- cor(corr_data, use = "pairwise.complete.obs")
cat("=== CORRELATION MATRIX ===\n")
## === CORRELATION MATRIX ===
print(round(cor_matrix, 3))
## Interpretation Confidence Perceived Impact Removal Impact
## Interpretation 1.000 0.093 0.038 -0.109
## Confidence 0.093 1.000 -0.019 -0.176
## Perceived Impact 0.038 -0.019 1.000 0.136
## Removal Impact -0.109 -0.176 0.136 1.000
# Visualize correlation matrix
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
cor_melted <- melt(cor_matrix)
plot_correlations <- ggplot(cor_melted, aes(Var1, Var2, fill = value)) +
geom_tile(color = "white") +
geom_text(aes(label = sprintf("%.2f", value)), size = 4, fontface = "bold") +
scale_fill_gradient2(low = "#2E5266", mid = "white", high = "#0072B2",
midpoint = 0, limits = c(-1, 1),
name = "Correlation") +
labs(
title = "Correlation Matrix of Key Variables",
subtitle = "Pearson correlations",
x = NULL,
y = NULL
) +
theme_analysis +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(plot_correlations)

6.2 Confidence
Predicting Perceived Impact
# Does confidence predict how important people think the clause is?
model_conf_impact <- clm(
Q4_IsTrue_ord ~ Confidence,
data = df_analysis,
link = "logit"
)
cat("=== CONFIDENCE PREDICTING PERCEIVED IMPACT ===\n")
## === CONFIDENCE PREDICTING PERCEIVED IMPACT ===
summary(model_conf_impact)
## formula: Q4_IsTrue_ord ~ Confidence
## data: df_analysis
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 121 -115.65 237.30 5(1) 7.19e-12 9.3e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Confidence -0.02706 0.12744 -0.212 0.832
##
## Threshold coefficients:
## Estimate Std. Error z value
## Not crucial|Somewhat important -0.52574 0.68915 -0.763
## Somewhat important|Absolutely essential -0.09115 0.68752 -0.133
# Visualization
plot_conf_impact <- df_analysis %>%
filter(!is.na(Confidence), !is.na(Q4_IsTrue_ord)) %>%
ggplot(aes(x = Confidence, fill = Q4_IsTrue_ord)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_fill_brewer(palette = "Blues", name = "Perceived Impact") +
labs(
title = "Confidence by Perceived Impact",
subtitle = "Proportion of Q4 responses at each confidence level",
x = "Confidence Rating (1-7)",
y = "Proportion"
) +
theme_analysis
print(plot_conf_impact)

6.3 Perceived Impact
Predicting Removal Impact
# Does thinking the clause is important predict how much removal matters?
model_impact_removal <- lmer(
Q5_Remove_centered ~ as.numeric(Q4_IsTrue_ord) + (1 | sentence),
data = df_analysis
)
## boundary (singular) fit: see help('isSingular')
cat("=== PERCEIVED IMPACT PREDICTING REMOVAL IMPACT ===\n")
## === PERCEIVED IMPACT PREDICTING REMOVAL IMPACT ===
summary(model_impact_removal)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Q5_Remove_centered ~ as.numeric(Q4_IsTrue_ord) + (1 | sentence)
## Data: df_analysis
##
## REML criterion at convergence: 471.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7853 -0.9024 -0.3091 0.8775 2.0642
##
## Random effects:
## Groups Name Variance Std.Dev.
## sentence (Intercept) 0.000 0.000
## Residual 2.841 1.685
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.7230 0.3721 -1.943
## as.numeric(Q4_IsTrue_ord) 0.2440 0.1628 1.499
##
## Correlation of Fixed Effects:
## (Intr)
## as.(Q4_IT_) -0.911
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Visualization
plot_impact_removal <- df_analysis %>%
filter(!is.na(Q4_IsTrue_ord), !is.na(Q5_Remove_centered)) %>%
ggplot(aes(x = Q4_IsTrue_ord, y = Q5_Remove_centered)) +
geom_boxplot(fill = "#0072B2", alpha = 0.7) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3,
fill = "white", color = "black") +
labs(
title = "Removal Impact by Perceived Importance",
subtitle = "Does viewing clause as important affect perceived removal impact?",
x = "Perceived Importance of Prefatory Clause",
y = "Removal Impact (Centered)"
) +
theme_analysis +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
print(plot_impact_removal)

6.4 Interpretation
and All Other Measures
# Summary of all measures by interpretation type
summary_by_interpretation <- df_analysis %>%
filter(!is.na(Q2_Interpretation_factor)) %>%
group_by(Q2_Interpretation_factor) %>%
summarise(
N = n(),
Mean_Confidence = mean(Confidence, na.rm = TRUE),
SD_Confidence = sd(Confidence, na.rm = TRUE),
Mean_Impact = mean(as.numeric(Q4_IsTrue_ord), na.rm = TRUE),
SD_Impact = sd(as.numeric(Q4_IsTrue_ord), na.rm = TRUE),
Mean_Removal = mean(Q5_Remove_centered, na.rm = TRUE),
SD_Removal = sd(Q5_Remove_centered, na.rm = TRUE),
.groups = "drop"
)
kable(summary_by_interpretation, digits = 2,
caption = "All Measures by Interpretation Type",
col.names = c("Interpretation", "N", "M Conf", "SD Conf",
"M Impact", "SD Impact", "M Removal", "SD Removal"))
All Measures by Interpretation Type
| Only in cases when necessary |
19 |
4.89 |
1.88 |
2.0 |
0.94 |
0.21 |
1.75 |
| Always necessary |
102 |
5.25 |
1.32 |
2.1 |
0.95 |
-0.29 |
1.68 |
7. Summary and
Recommendations
7.1 Key Findings
Summary
key_findings <- tibble(
Measure = c(
"Sample Size (after exclusions)",
"Retention Rate",
"Primary Interpretation",
"Mean Confidence",
"Perceived Impact: Not crucial",
"Perceived Impact: Somewhat important",
"Perceived Impact: Absolutely essential",
"Mean Removal Impact (centered)",
"Q2-Q1 Coding Agreement Rate"
),
Result = c(
sprintf("N = %d", nrow(df_analysis)),
sprintf("%.1f%%", nrow(df_analysis)/nrow(df_long)*100),
sprintf("%.1f%% Categorical ('Always necessary')",
mean(df_analysis$Q2_Interpretation_bin, na.rm = TRUE)*100),
sprintf("M = %.2f (SD = %.2f) on 1-7 scale",
mean(df_analysis$Confidence, na.rm = TRUE),
sd(df_analysis$Confidence, na.rm = TRUE)),
sprintf("%.1f%%", q4_overall$pct[1]),
sprintf("%.1f%%", q4_overall$pct[2]),
sprintf("%.1f%%", q4_overall$pct[3]),
sprintf("M = %.2f (SD = %.2f)", q5_summary$Mean, q5_summary$SD),
if(!is.null(coded_data)) sprintf("%.1f%%", agreement_rate*100) else "N/A"
)
)
kable(key_findings,
caption = "Summary of Key Findings")
Summary of Key Findings
| Sample Size (after exclusions) |
N = 121 |
| Retention Rate |
97.6% |
| Primary Interpretation |
84.3% Categorical (‘Always necessary’) |
| Mean Confidence |
M = 5.20 (SD = 1.42) on 1-7 scale |
| Perceived Impact: Not crucial |
40.5% |
| Perceived Impact: Somewhat important |
10.7% |
| Perceived Impact: Absolutely essential |
48.8% |
| Mean Removal Impact (centered) |
M = -0.21 (SD = 1.69) |
| Q2-Q1 Coding Agreement Rate |
65.8% |
7.2 Recommendations
for Full Study
cat("=== RECOMMENDATIONS ===\n\n")
## === RECOMMENDATIONS ===
cat("ITEMS TO RETAIN:\n")
## ITEMS TO RETAIN:
retain_items <- item_summary %>%
filter(
N >= 5,
Pct_Categorical > 20 & Pct_Categorical < 95,
Mean_Confidence > 4
) %>%
arrange(sentence)
cat(" - Sentences with balanced responses and adequate confidence:\n")
## - Sentences with balanced responses and adequate confidence:
print(retain_items %>% select(sentence, sentence_label, Pct_Categorical, Mean_Confidence))
## # A tibble: 12 × 4
## sentence sentence_label Pct_Categorical Mean_Confidence
## <int> <chr> <dbl> <dbl>
## 1 1 Archive 80 4.4
## 2 4 Tradition 83.3 5.67
## 3 5 Dam 85.7 5.71
## 4 8 Sundial 66.7 4.5
## 5 9 Masterpiece 71.4 5
## 6 11 Telescope 66.7 5.33
## 7 12 Shelter 71.4 5.86
## 8 14 Lighthouse 83.3 5.83
## 9 16 Insulated Shelter 71.4 5.43
## 10 17 Weather Station 60 5
## 11 18 Library 80 6
## 12 19 Aqueduct 60 5
cat("\n\nITEMS TO REVISE OR REMOVE:\n")
##
##
## ITEMS TO REVISE OR REMOVE:
problematic_items <- item_summary %>%
filter(
N < 5 |
Pct_Categorical < 20 | Pct_Categorical > 95 |
Mean_Confidence < 4
)
if (nrow(problematic_items) > 0) {
cat(" - Items with low N, ceiling/floor effects, or low confidence:\n")
print(problematic_items %>% select(sentence, sentence_label, N, Pct_Categorical, Mean_Confidence))
} else {
cat(" - No major problematic items identified\n")
}
## - Items with low N, ceiling/floor effects, or low confidence:
## # A tibble: 8 × 5
## sentence sentence_label N Pct_Categorical Mean_Confidence
## <int> <chr> <int> <dbl> <dbl>
## 1 2 Orchard 7 100 6.14
## 2 3 Fire 7 100 4.57
## 3 6 Marketplace 6 100 6
## 4 7 Road 6 100 5.17
## 5 10 Beehive 6 100 4.33
## 6 13 Food Reserve 6 100 5.5
## 7 15 Garden 5 100 4.2
## 8 20 Weathervane 6 100 3.83
cat("\n\nMETHODOLOGICAL RECOMMENDATIONS:\n")
##
##
## METHODOLOGICAL RECOMMENDATIONS:
cat(" 1. Q4 (3-point scale) showed bimodal distribution\n")
## 1. Q4 (3-point scale) showed bimodal distribution
cat(" → Consider expanding to 5-point scale for more nuance\n\n")
## → Consider expanding to 5-point scale for more nuance
cat(" 2. Q5 (removal impact) centered near zero\n")
## 2. Q5 (removal impact) centered near zero
cat(" → Question wording may be unclear; consider revision\n")
## → Question wording may be unclear; consider revision
cat(" → Or remove if not theoretically critical\n\n")
## → Or remove if not theoretically critical
cat(" 3. Q1 (rewrite) showed interesting explicit-implicit disconnect\n")
## 3. Q1 (rewrite) showed interesting explicit-implicit disconnect
cat(" → Retain for full study; very informative\n")
## → Retain for full study; very informative
cat(" → Develop clearer coding scheme before full study\n\n")
## → Develop clearer coding scheme before full study
cat(" 4. Attention/comprehension checks worked well\n")
## 4. Attention/comprehension checks worked well
cat(" → Retain same exclusion criteria\n\n")
## → Retain same exclusion criteria
cat(" 5. Sentence type differences minimal\n")
## 5. Sentence type differences minimal
cat(" → Could simplify design or test new moderators\n")
## → Could simplify design or test new moderators
8. Appendix: Full Model
Outputs
8.1 Q2 Models
cat("=== Q2: INTERPRETATION MODELS ===\n\n")
## === Q2: INTERPRETATION MODELS ===
cat("Model 1: Intercept only with random intercept for sentence\n")
## Model 1: Intercept only with random intercept for sentence
print(summary(model_q2))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Q2_Interpretation_bin ~ 1 + (1 | sentence)
## Data: df_analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 109.2 114.8 -52.6 105.2 119
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3170 0.4316 0.4316 0.4316 0.4316
##
## Random effects:
## Groups Name Variance Std.Dev.
## sentence (Intercept) 0 0
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6805 0.2499 6.726 1.75e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
cat("\n\nModel 2: With sentence type as fixed effect\n")
##
##
## Model 2: With sentence type as fixed effect
print(summary(model_q2_type))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Q2_Interpretation_bin ~ sentence_type + (1 | sentence)
## Data: df_analysis
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 109.1 120.3 -50.6 101.1 117
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4641 0.2887 0.3735 0.5606 0.5606
##
## Random effects:
## Groups Name Variance Std.Dev.
## sentence (Intercept) 0 0
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1575 0.3457 3.349 0.000812 ***
## sentence_typeAbility 0.8120 0.5562 1.460 0.144350
## sentence_typeOther 1.3275 0.8131 1.633 0.102561
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sntn_A
## sntnc_typAb -0.621
## sntnc_typOt -0.425 0.264
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Extract and compare
cat("\n\nModel Comparison:\n")
##
##
## Model Comparison:
anova(model_q2, model_q2_type)
## Data: df_analysis
## Models:
## model_q2: Q2_Interpretation_bin ~ 1 + (1 | sentence)
## model_q2_type: Q2_Interpretation_bin ~ sentence_type + (1 | sentence)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_q2 2 109.20 114.79 -52.599 105.20
## model_q2_type 4 109.14 120.33 -50.571 101.14 4.0553 2 0.1316
8.2 Q3 Models
cat("=== Q3: CONFIDENCE MODELS ===\n\n")
## === Q3: CONFIDENCE MODELS ===
cat("Full model with random slopes and intercepts:\n")
## Full model with random slopes and intercepts:
print(summary(model_q3))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Confidence ~ Q2_Interpretation_bin + (1 + Q2_Interpretation_bin |
## sentence)
## Data: df_analysis
##
## REML criterion at convergence: 421.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9723 -0.4127 0.2077 0.6141 1.6331
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## sentence (Intercept) 1.193 1.092
## Q2_Interpretation_bin 2.133 1.460 -0.96
## Residual 1.623 1.274
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.0095 0.4277 11.713
## Q2_Interpretation_bin 0.2583 0.4934 0.524
##
## Correlation of Fixed Effects:
## (Intr)
## Q2_Intrprt_ -0.940
# Check random effects
cat("\n\nRandom Effects Summary:\n")
##
##
## Random Effects Summary:
print(VarCorr(model_q3))
## Groups Name Std.Dev. Corr
## sentence (Intercept) 1.0921
## Q2_Interpretation_bin 1.4603 -0.959
## Residual 1.2740
8.3 Q4 Models
cat("=== Q4: PERCEIVED IMPACT MODELS ===\n\n")
## === Q4: PERCEIVED IMPACT MODELS ===
if (!inherits(model_q4_mixed, "try-error")) {
cat("Mixed model converged:\n")
print(summary(model_q4_mixed))
} else {
cat("Simple model (no random effects):\n")
print(summary(model_q4_simple))
cat("\n\nModel with sentence fixed effects:\n")
print(summary(model_q4_fixed))
}
## Mixed model converged:
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: Q4_IsTrue_ord ~ Q2_Interpretation_bin + (1 + Q2_Interpretation_bin |
## sentence)
## data: df_analysis
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 121 -115.58 243.16 284(450) 1.25e-05 1.3e+03
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## sentence (Intercept) 0.05435 0.2331
## Q2_Interpretation_bin 0.06071 0.2464 -1.000
## Number of groups: sentence 20
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Q2_Interpretation_bin 0.1918 0.5013 0.383 0.702
##
## Threshold coefficients:
## Estimate Std. Error z value
## Not crucial|Somewhat important -0.2270 0.4702 -0.483
## Somewhat important|Absolutely essential 0.2090 0.4647 0.450
8.4 Q5 Models
cat("=== Q5: REMOVAL IMPACT MODELS ===\n\n")
## === Q5: REMOVAL IMPACT MODELS ===
cat("Model 1: Intercept only\n")
## Model 1: Intercept only
print(summary(model_q5))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Q5_Remove_centered ~ 1 + (1 | sentence)
## Data: df_analysis
##
## REML criterion at convergence: 471.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6440 -1.0537 -0.4634 0.7171 1.8976
##
## Random effects:
## Groups Name Variance Std.Dev.
## sentence (Intercept) 0.00 0.000
## Residual 2.87 1.694
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.2149 0.1540 -1.395
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
cat("\n\nModel 2: With interpretation as predictor\n")
##
##
## Model 2: With interpretation as predictor
print(summary(model_q5_interp))
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## Q5_Remove_centered ~ Q2_Interpretation_bin + (1 + Q2_Interpretation_bin |
## sentence)
## Data: df_analysis
##
## REML criterion at convergence: 470.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8984 -1.0087 -0.4174 0.7652 1.9479
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## sentence (Intercept) 5.848e-09 7.647e-05
## Q2_Interpretation_bin 1.126e-08 1.061e-04 -1.00
## Residual 2.860e+00 1.691e+00
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.2105 0.3880 0.543
## Q2_Interpretation_bin -0.5046 0.4226 -1.194
##
## Correlation of Fixed Effects:
## (Intr)
## Q2_Intrprt_ -0.918
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
8.5 Cross-Variable
Models
cat("=== CROSS-VARIABLE RELATIONSHIP MODELS ===\n\n")
## === CROSS-VARIABLE RELATIONSHIP MODELS ===
cat("Confidence predicting Perceived Impact:\n")
## Confidence predicting Perceived Impact:
print(summary(model_conf_impact))
## formula: Q4_IsTrue_ord ~ Confidence
## data: df_analysis
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 121 -115.65 237.30 5(1) 7.19e-12 9.3e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Confidence -0.02706 0.12744 -0.212 0.832
##
## Threshold coefficients:
## Estimate Std. Error z value
## Not crucial|Somewhat important -0.52574 0.68915 -0.763
## Somewhat important|Absolutely essential -0.09115 0.68752 -0.133
cat("\n\nPerceived Impact predicting Removal Impact:\n")
##
##
## Perceived Impact predicting Removal Impact:
print(summary(model_impact_removal))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Q5_Remove_centered ~ as.numeric(Q4_IsTrue_ord) + (1 | sentence)
## Data: df_analysis
##
## REML criterion at convergence: 471.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7853 -0.9024 -0.3091 0.8775 2.0642
##
## Random effects:
## Groups Name Variance Std.Dev.
## sentence (Intercept) 0.000 0.000
## Residual 2.841 1.685
## Number of obs: 121, groups: sentence, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.7230 0.3721 -1.943
## as.numeric(Q4_IsTrue_ord) 0.2440 0.1628 1.499
##
## Correlation of Fixed Effects:
## (Intr)
## as.(Q4_IT_) -0.911
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
9. Data Export
# Export cleaned analysis dataset
write_csv(df_analysis, "pilot_analysis_data_cleaned.csv")
# Export all summary tables
write_csv(q2_overall, "summary_q2_overall.csv")
write_csv(q2_by_item, "summary_q2_by_item.csv")
write_csv(q3_by_item, "summary_q3_by_item.csv")
write_csv(q4_by_item, "summary_q4_by_item.csv")
write_csv(q5_by_item, "summary_q5_by_item.csv")
write_csv(item_summary, "summary_item_comprehensive.csv")
cat("=== DATA EXPORT COMPLETE ===\n")
## === DATA EXPORT COMPLETE ===
cat("Files saved to working directory:\n")
## Files saved to working directory:
cat(" - pilot_analysis_data_cleaned.csv\n")
## - pilot_analysis_data_cleaned.csv
cat(" - summary_q2_overall.csv\n")
## - summary_q2_overall.csv
cat(" - summary_q2_by_item.csv\n")
## - summary_q2_by_item.csv
cat(" - summary_q3_by_item.csv\n")
## - summary_q3_by_item.csv
cat(" - summary_q4_by_item.csv\n")
## - summary_q4_by_item.csv
cat(" - summary_q5_by_item.csv\n")
## - summary_q5_by_item.csv
cat(" - summary_item_comprehensive.csv\n")
## - summary_item_comprehensive.csv